Data Packages

Baseine pacakges for our analysis

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(here)
here() starts at U:/DS241/bikeshare_23
library(openmeteo)

Loading the Bikeshare Raw Data

df1 <- read_csv("data_raw/202309-capitalbikeshare-tripdata.csv", show_col_types = FALSE) %>% clean_names()

Creating Dataframe 2 - Interjoing

df2s=df1 %>% 
  select(rideable_type,member_casual,
                    contains("start"),ride_id)%>% 
  mutate(start_stop="start")%>%
  rename(t=started_at,
        station_name=start_station_name,
        station_id=start_station_id,
        lat=start_lat,
        lng=start_lng)

df2e=df1 %>%
select(ride_id,rideable_type,member_casual,
                    contains("end")) %>%
  mutate(start_stop="stop") %>%
  rename(t=ended_at,
        station_name=end_station_name,
        station_id=end_station_id,
        lat=end_lat,
        lng=end_lng)

df2=bind_rows(df2s,df2e) %>%
  arrange(t) %>%
  mutate(rider_delta=(start_stop=="start")*2-1) %>% #change in ridership 
  mutate(riders=cumsum(rider_delta)) %>%
  relocate(riders,.after=t)

Plotting the Dataframe

df2 %>% 
  ggplot(aes(t,riders)) +
  geom_line()

Creating a subsampled dataset

df_s=df2 %>% slice_head(n=1000)

Why looking at slicing to every one hundredth data point will be bad? - Will likely track features however will jump data points whether the data is sparse or not. Want the data to be equally spaced in time.

Starting with 100 rows to get the calculation right and then see if oyu can do it with a bigger data set. Data stored in number of seconds based on a reference point

Round down to the nearest 10 mins, relocate t_f next to t, and filter the data points by t_f

df_e=df_s |>
  mutate(t_f=floor_date(t, "10 mins")) %>%
   relocate(t_f,.after=t) %>%
  slice_head(n=1,by=t_f)

Applying to Previous Manipulations to the Entire Data Set

df_r=df2 |>
  mutate(t_f=floor_date(t,"10 mins")) %>%
  relocate(t_f,.after=t) %>%
  slice_head(n=1,by=t_f)

Creating the Associated Plot


p1=df2 %>% 
  filter(day(t)==18) %>%
  ggplot(aes(x=t, y=riders)) +
  geom_line() +
  ggtitle("Riders on 18Sep")

p1+
  geom_line(data=df_r %>% filter(day(t)==18),
  color="red")

df_r=df2 |>
  mutate(t_f=floor_date(t,"1 mins")) %>%
  relocate(t_f,.after=t) %>%
  slice_head(n=1,by=t_f)
p1=df2 %>% 
  filter(day(t)==18) %>%
  ggplot(aes(x=t, y=riders)) +
  geom_line() +
  ggtitle("Riders on 18Sep")

p1+
  geom_line(data=df_r %>% filter(day(t)==18),
  color="red")

% Get Weather Data

df_w=weather_history("Washington",
                    start = "2023-09-01",
                    end = "2023-09-30",
                    hourly = c("apparent_temperature",
                            "wind_speed_10m",
                            "precipitation")
)
`geocode()` has matched "Washington" to:
Washington in Washington, D.C., United States
Population: 601723
Co-ordinates: c(38.89511, -77.03637)

Merging bike and weather data

df_s=df2 %>% slice_sample(n=1000)
df_j=df_s %>% left_join(df_w,
                        by=join_by(closest(t>=datetime)))
df_j=df_s %>% 
   left_join(df_w,by=join_by(closest(t>=datetime)))  %>%
   relocate(datetime, .after=t)
 
head(df_j)

Investigating the Time Zone Mismatch Between the Bikeshare and Weather Datasets

df_j$t[1:5]
[1] "2023-09-05 07:07:28 UTC" "2023-09-22 11:00:32 UTC" "2023-09-14 10:05:05 UTC" "2023-09-09 09:17:47 UTC"
[5] "2023-09-08 15:10:02 UTC"
df_j$datetime[1:5]
[1] "2023-09-05 03:00:00 EDT" "2023-09-22 07:00:00 EDT" "2023-09-14 06:00:00 EDT" "2023-09-09 05:00:00 EDT"
[5] "2023-09-08 11:00:00 EDT"
Sys.timezone()
[1] "America/New_York"
df_s_est <- force_tz(df_s, tzone = "America/New_York")
df_j_est=df_s_est %>% 
   left_join(df_w,by=join_by(closest(t>=datetime)))  %>%
   relocate(datetime, .after=t)
 
head(df_j_est)
df_j_est$datetime[1:5]
[1] "2023-09-05 07:00:00 EDT" "2023-09-22 11:00:00 EDT" "2023-09-14 10:00:00 EDT" "2023-09-09 09:00:00 EDT"
[5] "2023-09-08 15:00:00 EDT"
df2$t[1:5]
[1] "2023-09-01 00:00:44 UTC" "2023-09-01 00:01:48 UTC" "2023-09-01 00:01:48 UTC" "2023-09-01 00:02:03 UTC"
[5] "2023-09-01 00:02:21 UTC"
force_tz(df2$t[1:5],"America/New_York")
[1] "2023-09-01 00:00:44 EDT" "2023-09-01 00:01:48 EDT" "2023-09-01 00:01:48 EDT" "2023-09-01 00:02:03 EDT"
[5] "2023-09-01 00:02:21 EDT"
df2c=df2 %>% mutate(t=force_tz(t,tzone="America/New_York")) #corrected
 
df_s2=df2c %>% slice_sample(n=1000)
 
df_j2=df_s2 %>% 
   left_join(df_w,by=join_by(closest(t>=datetime)))  %>%
   relocate(datetime, .after=t)
 
head(df_j2)
NA
NA
dfc=df2c %>% 
  left_join(df_w,by=join_by(closest(t>=datetime)))  %>%
   relocate(datetime, .after=t) %>%
  rename(atemp=hourly_apparent_temperature,
         wind=hourly_wind_speed_10m,
         prec=hourly_precipitation)

A Visualization of the Data

Coloring when the rain is greater than 1- color each point based on whether it was raining or not

p2=dfc %>%
  ggplot(aes(x=t,y=riders,color=prec>1)) +
  geom_point()
p2

NA
p3 = dfc %>%
  filter(day(t)==23) %>%
  ggplot(aes(t,riders,color=wind))+
  geom_point() +
  ggtitle("Riders vs. Precipitation on Sep 10")

p3

plotly::ggplotly(p3)
LS0tDQp0aXRsZTogIkVYUEVSSU1FTlQgNF9iaWtlc2hhcmUgZGF0YSAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIERhdGEgUGFja2FnZXMgDQoNCkJhc2VpbmUgcGFjYWtnZXMgZm9yIG91ciBhbmFseXNpcw0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShvcGVubWV0ZW8pDQpgYGANCg0KDQojIExvYWRpbmcgdGhlIEJpa2VzaGFyZSBSYXcgRGF0YQ0KYGBge3J9DQpkZjEgPC0gcmVhZF9jc3YoImRhdGFfcmF3LzIwMjMwOS1jYXBpdGFsYmlrZXNoYXJlLXRyaXBkYXRhLmNzdiIsIHNob3dfY29sX3R5cGVzID0gRkFMU0UpICU+JSBjbGVhbl9uYW1lcygpDQpgYGANCg0KIyBDcmVhdGluZyBEYXRhZnJhbWUgMiAtIEludGVyam9pbmcgDQpgYGB7cn0NCmRmMnM9ZGYxICU+JSANCiAgc2VsZWN0KHJpZGVhYmxlX3R5cGUsbWVtYmVyX2Nhc3VhbCwNCiAgICAgICAgICAgICAgICAJY29udGFpbnMoInN0YXJ0IikscmlkZV9pZCklPiUgDQogIG11dGF0ZShzdGFydF9zdG9wPSJzdGFydCIpJT4lDQogIHJlbmFtZSh0PXN0YXJ0ZWRfYXQsDQogICAgIAlzdGF0aW9uX25hbWU9c3RhcnRfc3RhdGlvbl9uYW1lLA0KICAgICAJc3RhdGlvbl9pZD1zdGFydF9zdGF0aW9uX2lkLA0KICAgICAJbGF0PXN0YXJ0X2xhdCwNCiAgICAgCWxuZz1zdGFydF9sbmcpDQoNCmRmMmU9ZGYxICU+JQ0Kc2VsZWN0KHJpZGVfaWQscmlkZWFibGVfdHlwZSxtZW1iZXJfY2FzdWFsLA0KICAgICAgICAgICAgICAgIAljb250YWlucygiZW5kIikpICU+JQ0KICBtdXRhdGUoc3RhcnRfc3RvcD0ic3RvcCIpICU+JQ0KICByZW5hbWUodD1lbmRlZF9hdCwNCiAgICAgCXN0YXRpb25fbmFtZT1lbmRfc3RhdGlvbl9uYW1lLA0KICAgICAJc3RhdGlvbl9pZD1lbmRfc3RhdGlvbl9pZCwNCiAgICAgCWxhdD1lbmRfbGF0LA0KICAgICAJbG5nPWVuZF9sbmcpDQoNCmRmMj1iaW5kX3Jvd3MoZGYycyxkZjJlKSAlPiUNCiAgYXJyYW5nZSh0KSAlPiUNCiAgbXV0YXRlKHJpZGVyX2RlbHRhPShzdGFydF9zdG9wPT0ic3RhcnQiKSoyLTEpICU+JSAjY2hhbmdlIGluIHJpZGVyc2hpcCANCiAgbXV0YXRlKHJpZGVycz1jdW1zdW0ocmlkZXJfZGVsdGEpKSAlPiUNCiAgcmVsb2NhdGUocmlkZXJzLC5hZnRlcj10KQ0KDQpgYGANCg0KIyBQbG90dGluZyB0aGUgRGF0YWZyYW1lDQoNCmBgYHtyfQ0KZGYyICU+JSANCiAgZ2dwbG90KGFlcyh0LHJpZGVycykpICsNCiAgZ2VvbV9saW5lKCkNCmBgYA0KIyBDcmVhdGluZyBhIHN1YnNhbXBsZWQgZGF0YXNldCANCg0KYGBge3J9DQpkZl9zPWRmMiAlPiUgc2xpY2VfaGVhZChuPTEwMDApDQoNCmBgYA0KDQpXaHkgbG9va2luZyBhdCBzbGljaW5nIHRvIGV2ZXJ5IG9uZSBodW5kcmVkdGggZGF0YSBwb2ludCB3aWxsIGJlIGJhZD8gLSBXaWxsIGxpa2VseSB0cmFjayBmZWF0dXJlcyBob3dldmVyIHdpbGwganVtcCBkYXRhIHBvaW50cyB3aGV0aGVyIHRoZSBkYXRhIGlzIHNwYXJzZSBvciBub3QuIFdhbnQgdGhlIGRhdGEgdG8gYmUgZXF1YWxseSBzcGFjZWQgaW4gdGltZS4NCg0KU3RhcnRpbmcgd2l0aCAxMDAgcm93cyB0byBnZXQgdGhlIGNhbGN1bGF0aW9uIHJpZ2h0IGFuZCB0aGVuIHNlZSBpZiBveXUgY2FuIGRvIGl0IHdpdGggYSBiaWdnZXIgZGF0YSBzZXQuIA0KRGF0YSBzdG9yZWQgaW4gbnVtYmVyIG9mIHNlY29uZHMgYmFzZWQgb24gYSByZWZlcmVuY2UgcG9pbnQNCg0KDQpSb3VuZCBkb3duIHRvIHRoZSBuZWFyZXN0IDEwIG1pbnMsIHJlbG9jYXRlIHRfZiBuZXh0IHRvIHQsIGFuZCBmaWx0ZXIgdGhlIGRhdGEgcG9pbnRzIGJ5IHRfZg0KDQpgYGB7cn0NCmRmX2U9ZGZfcyB8Pg0KICBtdXRhdGUodF9mPWZsb29yX2RhdGUodCwgIjEwIG1pbnMiKSkgJT4lDQogICByZWxvY2F0ZSh0X2YsLmFmdGVyPXQpICU+JQ0KICBzbGljZV9oZWFkKG49MSxieT10X2YpDQpgYGANCg0KDQojIEFwcGx5aW5nIHRvIFByZXZpb3VzIE1hbmlwdWxhdGlvbnMgdG8gdGhlIEVudGlyZSBEYXRhIFNldCANCmBgYHtyfQ0KZGZfcj1kZjIgfD4NCiAgbXV0YXRlKHRfZj1mbG9vcl9kYXRlKHQsIjEwIG1pbnMiKSkgJT4lDQogIHJlbG9jYXRlKHRfZiwuYWZ0ZXI9dCkgJT4lDQogIHNsaWNlX2hlYWQobj0xLGJ5PXRfZikNCmBgYA0KDQogDQojIENyZWF0aW5nIHRoZSBBc3NvY2lhdGVkIFBsb3QgDQpgYGB7cn0NCg0KcDE9ZGYyICU+JSANCiAgZmlsdGVyKGRheSh0KT09MTgpICU+JQ0KICBnZ3Bsb3QoYWVzKHg9dCwgeT1yaWRlcnMpKSArDQogIGdlb21fbGluZSgpICsNCiAgZ2d0aXRsZSgiUmlkZXJzIG9uIDE4U2VwIikNCg0KcDErDQogIGdlb21fbGluZShkYXRhPWRmX3IgJT4lIGZpbHRlcihkYXkodCk9PTE4KSwNCiAgY29sb3I9InJlZCIpDQoNCmBgYA0KIA0KDQpgYGB7cn0NCmRmX3I9ZGYyIHw+DQogIG11dGF0ZSh0X2Y9Zmxvb3JfZGF0ZSh0LCIxIG1pbnMiKSkgJT4lDQogIHJlbG9jYXRlKHRfZiwuYWZ0ZXI9dCkgJT4lDQogIHNsaWNlX2hlYWQobj0xLGJ5PXRfZikNCg0KYGBgDQoNCg0KYGBge3J9DQpwMT1kZjIgJT4lIA0KICBmaWx0ZXIoZGF5KHQpPT0xOCkgJT4lDQogIGdncGxvdChhZXMoeD10LCB5PXJpZGVycykpICsNCiAgZ2VvbV9saW5lKCkgKw0KICBnZ3RpdGxlKCJSaWRlcnMgb24gMThTZXAiKQ0KDQpwMSsNCiAgZ2VvbV9saW5lKGRhdGE9ZGZfciAlPiUgZmlsdGVyKGRheSh0KT09MTgpLA0KICBjb2xvcj0icmVkIikNCmBgYA0KDQolIEdldCBXZWF0aGVyIERhdGENCmBgYHtyfQ0KZGZfdz13ZWF0aGVyX2hpc3RvcnkoIldhc2hpbmd0b24iLA0KICAgICAgICAgICAgICAgIAlzdGFydCA9ICIyMDIzLTA5LTAxIiwNCiAgICAgICAgICAgICAgICAJZW5kID0gIjIwMjMtMDktMzAiLA0KICAgICAgICAgICAgICAgIAlob3VybHkgPSBjKCJhcHBhcmVudF90ZW1wZXJhdHVyZSIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAJIndpbmRfc3BlZWRfMTBtIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIAkicHJlY2lwaXRhdGlvbiIpDQopDQoNCmBgYA0KIyBNZXJnaW5nIGJpa2UgYW5kIHdlYXRoZXIgZGF0YQ0KDQpgYGB7cn0NCmRmX3M9ZGYyICU+JSBzbGljZV9zYW1wbGUobj0xMDAwKQ0KZGZfaj1kZl9zICU+JSBsZWZ0X2pvaW4oZGZfdywNCiAgICAgICAgICAgICAgICAgICAgIAlieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkNCmBgYA0KDQoNCmBgYHtyfQ0KZGZfaj1kZl9zICU+JSANCiAgIGxlZnRfam9pbihkZl93LGJ5PWpvaW5fYnkoY2xvc2VzdCh0Pj1kYXRldGltZSkpKSAgJT4lDQogICByZWxvY2F0ZShkYXRldGltZSwgLmFmdGVyPXQpDQogDQpoZWFkKGRmX2opDQpgYGANCg0KIyMgSW52ZXN0aWdhdGluZyB0aGUgVGltZSBab25lIE1pc21hdGNoIEJldHdlZW4gdGhlIEJpa2VzaGFyZSBhbmQgV2VhdGhlciBEYXRhc2V0cyANCg0KYGBge3J9DQpkZl9qJHRbMTo1XQ0KZGZfaiRkYXRldGltZVsxOjVdDQoNCmBgYA0KDQpgYGB7cn0NClN5cy50aW1lem9uZSgpDQpgYGANCg0KYGBge3J9DQpkZl9zX2VzdCA8LSBmb3JjZV90eihkZl9zLCB0em9uZSA9ICJBbWVyaWNhL05ld19Zb3JrIikNCmBgYA0KDQoNCmBgYHtyfQ0KZGZfal9lc3Q9ZGZfc19lc3QgJT4lIA0KICAgbGVmdF9qb2luKGRmX3csYnk9am9pbl9ieShjbG9zZXN0KHQ+PWRhdGV0aW1lKSkpICAlPiUNCiAgIHJlbG9jYXRlKGRhdGV0aW1lLCAuYWZ0ZXI9dCkNCiANCmhlYWQoZGZfal9lc3QpDQpgYGANCg0KYGBge3J9DQpkZl9qX2VzdCR0WzE6NV0NCmRmX2pfZXN0JGRhdGV0aW1lWzE6NV0NCmBgYA0KYGBge3J9DQpkZjIkdFsxOjVdDQpmb3JjZV90eihkZjIkdFsxOjVdLCJBbWVyaWNhL05ld19Zb3JrIikNCmBgYA0KDQpgYGB7cn0NCmRmMmM9ZGYyICU+JSBtdXRhdGUodD1mb3JjZV90eih0LHR6b25lPSJBbWVyaWNhL05ld19Zb3JrIikpICNjb3JyZWN0ZWQNCiANCmRmX3MyPWRmMmMgJT4lIHNsaWNlX3NhbXBsZShuPTEwMDApDQogDQpkZl9qMj1kZl9zMiAlPiUgDQogICBsZWZ0X2pvaW4oZGZfdyxieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkgICU+JQ0KICAgcmVsb2NhdGUoZGF0ZXRpbWUsIC5hZnRlcj10KQ0KIA0KaGVhZChkZl9qMikNCg0KYGBgDQoNCmBgYHtyfQ0KZGZjPWRmMmMgJT4lIA0KICBsZWZ0X2pvaW4oZGZfdyxieT1qb2luX2J5KGNsb3Nlc3QodD49ZGF0ZXRpbWUpKSkgJT4lDQogICByZWxvY2F0ZShkYXRldGltZSwgLmFmdGVyPXQpICU+JQ0KICByZW5hbWUoYXRlbXA9aG91cmx5X2FwcGFyZW50X3RlbXBlcmF0dXJlLA0KICAgICAgICAgd2luZD1ob3VybHlfd2luZF9zcGVlZF8xMG0sDQogICAgICAgICBwcmVjPWhvdXJseV9wcmVjaXBpdGF0aW9uKQ0KYGBgDQoNCg0KIyMgQSBWaXN1YWxpemF0aW9uIG9mIHRoZSBEYXRhDQoNCkNvbG9yaW5nIHdoZW4gdGhlIHJhaW4gaXMgZ3JlYXRlciB0aGFuIDEtIGNvbG9yIGVhY2ggcG9pbnQgYmFzZWQgb24gd2hldGhlciBpdCB3YXMgcmFpbmluZyBvciBub3QgDQoNCmBgYHtyfQ0KcDI9ZGZjICU+JQ0KICBnZ3Bsb3QoYWVzKHg9dCx5PXJpZGVycyxjb2xvcj1wcmVjPjEpKSArDQogIGdlb21fcG9pbnQoKQ0KcDINCiAgDQpgYGANCg0KDQpgYGB7cn0NCnAzID0gZGZjICU+JQ0KICBmaWx0ZXIoZGF5KHQpPT0yMykgJT4lDQogIGdncGxvdChhZXModCxyaWRlcnMsY29sb3I9d2luZCkpKw0KICBnZW9tX3BvaW50KCkgKw0KICBnZ3RpdGxlKCJSaWRlcnMgdnMuIFByZWNpcGl0YXRpb24gb24gU2VwIDEwIikNCg0KcDMNCg0KYGBgDQoNCg0KYGBge3J9DQpwbG90bHk6OmdncGxvdGx5KHAzKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg==